// bsls_spinlock.h                                                    -*-C++-*-
#ifndef INCLUDED_BSLS_SPINLOCK
#define INCLUDED_BSLS_SPINLOCK

#include <bsls_ident.h>
BSLS_IDENT("$: $")

//@PURPOSE: Provide a spin lock.
//
//@CLASSES:
//  bsls::SpinLock: A mutex using "busy waiting" atomic operations
//  bsls::SpinLockGuard: Automatic locking-unlocking of SpinLock
//
//@DESCRIPTION: This component provides a "busy wait" mutual exclusion lock
// primitive ("mutex").  A `SpinLock` is small and statically-initializable,
// but because it "spins" in a tight loop rather than using system operations
// to block the thread of execution, it is unsuited for use cases involving
// high contention or long critical regions.  Additionally, this component does
// not provide any guarantee of fairness when multiple threads are contending
// for the same lock.  Use `bsls::SpinLockGuard` for automatic
// locking-unlocking in a scope.
//
// *WARNING*: A `bsls::SpinLock` *must* be aggregate initialized to
// `BSLS_SPINLOCK_UNLOCKED`.  For example:
// ```
// bsls::SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
// ```
// Note that `SpinLock` is a struct requiring aggregate initialization to allow
// lock variables to be statically initialized when using a C++03 compiler
// (i.e., without using `constexpr`).
//
///Usage
///-----
// In this section we show intended use of this component.
//
///Example 1: Maintaining Static Count/Max Values
/// - - - - - - - - - - - - - - - - - - - - - - -
// Suppose that we want to determine the maximum number of threads executing a
// block of code concurrently.  Note that such a use case naturally calls for a
// statically initialized lock and the critical region involves a few integer
// operations; SpinLock may be suitable.
//
// First, we define a type to manage the count within a scope:
// ```
// /// This type manages a count and high-water-mark within a scope.
// /// It decrements the count in its destructor upon leaving the scope.
// class MaxConcurrencyCounter {
//
//     // DATA
//     int            *d_count_p;
//     bsls::SpinLock *d_lock_p;
//
//   public:
//     // CREATORS
//
//     /// Acquire the specified `lock` and increment the specified `count`.
//     /// If the resulting value is larger than the specified `max`,
//     /// load it into `max`. Release `lock` and create a scoped guard to
//     /// decrement `count` on destruction.
//     MaxConcurrencyCounter(int *count, int *max, bsls::SpinLock *lock);
//
//     /// Acquire the lock specified at construction, decrement the count
//     /// variable, and release the lock.
//     ~MaxConcurrencyCounter();
//   };
//
//   MaxConcurrencyCounter::MaxConcurrencyCounter(int            *count,
//                                                int            *max,
//                                                bsls::SpinLock *lock)
//   : d_count_p(count)
//   , d_lock_p(lock) {
//      bsls::SpinLockGuard guard(lock);
//      int result = ++(*count);
//      if (result > *max) {
//         *max = result;
//      }
//   }
//
//   MaxConcurrencyCounter::~MaxConcurrencyCounter() {
//      bsls::SpinLockGuard guard(d_lock_p);
//      --(*d_count_p);
//   }
// ```
// Next, we declare static variables to track the call count and a SpinLock to
// guard them.  `SpinLock` may be statically initialized using the
// `BSLS_SPINLOCK_UNLOCKED` constant:
// ```
//  {
//     static int            threadCount = 0;
//     static int            maxThreads = 0;
//     static bsls::SpinLock threadLock = BSLS_SPINLOCK_UNLOCKED;
// ```
// Next, by creating a `MaxConcurrencyCounter` object, each thread entering the
// block of code uses the `SpinLock` to synchronize manipulation of the static
// count variables:
// ```
//     MaxConcurrencyCounter counter(&threadCount, &maxThreads, &threadLock);
// ```
// Finally, closing the block synchronizes on the `SpinLock` again to decrement
// the thread count.  Any intervening code can run in parallel.
// ```
//  }
// ```
//
///Example 2: Fine-Grained Locking
///- - - - - - - - - - - - - - - -
// Suppose that we have a large array of objects to be manipulated concurrently
// by multiple threads, but the size of the array itself does not change.
// (This might be because it represents an inherently fixed number of objects
// or because changes to the array size are infrequent and controlled by some
// other synchronization mechanism like a "reader-writer" lock).  Thus one
// thread can manipulate a particular object in the array concurrently with a
// different thread manipulating another.  If the manipulations are short and
// contention is likely to be low, SpinLock might be suitable due to its small
// size.
//
// In particular, imagine we want a threadsafe "multi-queue".  In this case, we
// would have an array of queues, each with a SpinLock member for fine-grained
// locking.  First, we define the type to be held in the array.
// ```
// /// This type implements a threadsafe queue with a small memory footprint
// /// and low initialization costs. It is designed for low-contention use
// /// only.
// template<typename TYPE>
// class LightweightThreadsafeQueue {
//
//    // TYPES
//    struct Node {
//         TYPE  d_item;
//         Node *d_next_p;
//
//         Node(const TYPE& item) : d_item(item), d_next_p(0) {}
//     };
//
//    // DATA
//    Node           *d_front_p; // Front of queue, or 0 if empty
//    Node           *d_back_p; // Back of queue, or 0 if empty
//    bsls::SpinLock  d_lock;
//
//  public:
//    // CREATORS
//
//    /// Create an empty queue.
//    LightweightThreadsafeQueue();
//
//    /// Destroy this object.
//    ~LightweightThreadsafeQueue();
//
//    // MANIPULATORS
//
//    /// Remove the element at the front of the queue and load it into the
//    /// specified `value`. Return `0` on success, or a nonzero value if
//    /// the queue is empty.
//    int dequeue(TYPE* value);
//
//    /// Add the specified `value` to the back of the queue.
//    void enqueue(const TYPE& value);
//  };
// ```
// Next, we implement the creators.  Note that a different idiom is used to
// initialize member variables of `SpinLock` type than is used for static
// variables:
// ```
// template<typename TYPE>
// LightweightThreadsafeQueue<TYPE>::LightweightThreadsafeQueue()
// : d_front_p(0)
// , d_back_p(0)
// , d_lock(bsls::SpinLock::s_unlocked)
// {}
//
// template<typename TYPE>
// LightweightThreadsafeQueue<TYPE>::~LightweightThreadsafeQueue() {
//    for (Node *node = d_front_p; 0 != node; ) {
//        Node *next = node->d_next_p;
//        delete node;
//        node = next;
//    }
// }
// ```
// Then we implement the manipulator functions using `SpinLockGuard` to ensure
// thread safety.  Note that we do memory allocation and deallocation outside
// the scope of the lock, as these may involve system calls that should be
// avoided in the scope of a SpinLock.
// ```
// template<typename TYPE>
// int LightweightThreadsafeQueue<TYPE>::dequeue(TYPE* value) {
//    Node *front;
//    {
//       bsls::SpinLockGuard guard(&d_lock);
//       front = d_front_p;
//       if (0 == front) {
//         return 1;
//       }
//
//       *value = front->d_item;
//
//       if (d_back_p == front) {
//          d_front_p = d_back_p = 0;
//       } else {
//          d_front_p = front->d_next_p;
//       }
//    }
//    delete front;
//    return 0;
// }
//
// template<typename TYPE>
// void LightweightThreadsafeQueue<TYPE>::enqueue(const TYPE& value) {
//    Node *node = new Node(value);
//    bsls::SpinLockGuard guard(&d_lock);
//    if (0 == d_front_p && 0 == d_back_p) {
//       d_front_p = d_back_p = node;
//    } else {
//       d_back_p->d_next_p = node;
//       d_back_p = node;
//    }
// }
// ```
//  To illustrate fine-grained locking with this queue, we create a thread
//  function that will manipulate queues out of a large array at random.
//  Since each element in the array is locked independently, these threads
//  will rarely contend for the same queue and can run largely in parallel.
// ```
// const int NUM_QUEUES = 10000;
// const int NUM_ITERATIONS = 20000;
//
// struct QueueElement {
//    int d_threadId;
//    int d_value;
// };
//
// struct ThreadParam {
//    LightweightThreadsafeQueue<QueueElement> *d_queues_p;
//    int                                       d_threadId;
// };
//
// void *addToRandomQueues(void *paramAddr) {
//    ThreadParam *param = (ThreadParam*)paramAddr;
//    LightweightThreadsafeQueue<QueueElement> *queues = param->d_queues_p;
//    int threadId = param->d_threadId;
//    unsigned seed = threadId;
//    for (int i = 0; i < NUM_ITERATIONS; ++i) {
//       int queueIndex = rand_r(&seed) % NUM_QUEUES;
//       LightweightThreadsafeQueue<QueueElement> *queue = queues + queueIndex;
//       QueueElement value = { threadId, i };
//       queue->enqueue(value);
//    }
//    return 0;
// }
// ```
// Finally, we create the "multi-queue" and several of these threads to
// manipulate it.  We assume the existence of a createThread() function that
// starts a new thread of execution with a parameter, and we omit details of
// "joining" these threads.
// ```
// enum { NUM_THREADS = 3};
// LightweightThreadsafeQueue<QueueElement> multiQueue[NUM_QUEUES];
// ThreadParam threadParams[NUM_THREADS];
// for (int i = 0; i < NUM_THREADS; ++i) {
//   threadParams[i].d_queues_p = multiQueue;
//   threadParams[i].d_threadId = i + 1;
//   createThread(addToRandomQueues, threadParams + i);
// }
// ```
//

#include <bsls_assert.h>
#include <bsls_atomicoperations.h>
#include <bsls_compilerfeatures.h>
#include <bsls_keyword.h>
#include <bsls_platform.h>
#include <bsls_performancehint.h>

#if (BSLS_COMPILERFEATURES_CPLUSPLUS < 201703L)
#define BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
#endif

/// Use this macro as the value for initializing an object of type
/// `SpinLock`.  For example:
/// ```
/// SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
/// ```
#ifdef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
#define BSLS_SPINLOCK_UNLOCKED  { {0} }
#else
#define BSLS_SPINLOCK_UNLOCKED (::BloombergLP::bsls::SpinLock::s_unlocked)
#endif

#ifdef BSLS_PLATFORM_OS_WINDOWS
typedef unsigned long DWORD;
typedef int BOOL;

extern "C" {
    __declspec(dllimport) void __stdcall Sleep(
            DWORD dwMilliseconds);

    __declspec(dllimport) DWORD __stdcall SleepEx(
            DWORD dwMilliseconds,
            BOOL  bAlertable);
};
#else
#include <pthread.h>
#include <sched.h>
#include <time.h>
#endif

namespace BloombergLP {
namespace bsls {

                   // ================================
                   // class SpinLock_MemberInitializer
                   // ================================

/// This component-private class is an empty type to work around legacy
/// initialization syntax.  The only object of this type should be
/// `Spinlock::s_unlocked`, which should only ever be used to initialize
/// member variables of type `SpinLock`.
class SpinLock_MemberInitializer {
};

                             // ==============
                             // class SpinLock
                             // ==============

/// A statically-initializable synchronization primitive that "spins" (i.e.,
/// executes user instructions in a tight loop) rather than blocking waiting
/// threads using system calls.  The following idiom is used to initialize
/// `SpinLock` variables:
/// ```
/// SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
/// ```
/// A class member `d_lock` of type `SpinLock` may be initialized using the
/// following idiom:
/// ```
/// , d_lock(SpinLock::s_unlocked)
/// ```
struct SpinLock {

  private:
    // NOT IMPLEMENTED
#ifdef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
    SpinLock& operator=(const SpinLock&) BSLS_KEYWORD_DELETED;

    /// Note that we would like to prohibit copy construction, but then this
    /// class would not be an aggregate and could not be initialized
    /// statically in C++03 mode.
    //! SpinLock(const SpinLock&) = delete;
#else
    SpinLock& operator=(const SpinLock&) = delete;
    SpinLock(const SpinLock&) = delete;
#endif

    // PRIVATE TYPES
    enum {
        e_UNLOCKED = 0, // unlocked state value
        e_LOCKED = 1    // locked state value
    };

    // PRIVATE CLASS METHODS

    /// This function provides a backoff mechanism for `lock` and `tryLock`,
    /// using the specified `count` as the backoff counter.  `count` is
    /// incremented within this method.
    static void doBackoff(int *count);

    /// If available, invoke a pause operation (e.g., Intel's `pause`
    /// instruction); otherwise do nothing.  (i.e., this method is a no-op).
    static void pause();

    /// Sleep the specified `milliseconds`.  Note that this partially
    /// reimplements `bslmt::ThreadUtil::microSleep` locally, as `bslmt` is
    /// above `bsls`.
    static void sleepMillis(int milliseconds);

    /// Move the current thread to the end of the scheduler's queue and
    /// schedule another thread to run.  Note that this allows cooperating
    /// threads of the same priority to share CPU resources equally.  Also
    /// note that this reimplements `bslmt::ThreadUtil::yield()` locally, as
    /// `bslmt` is above `bsls`.
    static void yield();

#ifdef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
  public:
    // PUBLIC DATA

    /// 0 when unlocked.  'd_state is public to allow aggregate
    /// initialization where constexpr constructors are not available.  It
    /// is semantically private, and should not be used directly.
    AtomicOperations::AtomicTypes::Int d_state;
#else
  private:
    // DATA

    // 0 when unlocked.
    AtomicOperations::AtomicTypes::Int d_state;
#endif

  public:
    // CREATORS
#ifndef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
    /// Create a `SpinLock` object in the unlocked state.
    constexpr SpinLock();

    /// Create a `SpinLock` object in the unlocked state.  Note that this is
    /// provided to allow for a syntax that was necessary in C++03, but is
    /// no longer necessary.
    constexpr SpinLock(const SpinLock_MemberInitializer&);          // IMPLICIT
#endif

    // PUBLIC CLASS DATA
#ifdef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
    static const SpinLock s_unlocked;
#else
    static constexpr SpinLock_MemberInitializer s_unlocked = {};
#endif
        // This constant SpinLock is always unlocked.  It is suitable for use
        // initializing class members of SpinLock type.

    // MANIPULATORS

    /// Spin (repeat a loop continuously without using the system to pause
    /// or reschedule the thread) until this object is unlocked, then
    /// atomically acquire the lock.
    void lock();

    /// Repeat a loop continuously, potentially using the system to pause or
    /// reschedule the thread, until this object is unlocked, then
    /// atomically acquire the lock.  The spinning has backoff logic.  Note
    /// that this method is recommended when system calls are permissible,
    /// significant contention is expected, and no better mutual exclusion
    /// primitive is available.
    void lockWithBackoff();

    /// Spin (repeat a loop continuously without using the system to pause
    /// or reschedule the thread) until this object is unlocked, then
    /// atomically acquire the lock.  The spinning does not perform a
    /// backoff.
    void lockWithoutBackoff();

    /// Attempt to acquire the lock; optionally specify the `numRetries`
    /// times to attempt again if this object is already locked.  Return 0
    /// on success, and a non-zero value if the lock was not successfully
    /// acquired.  The behavior is undefined unless `0 <= numRetries`.
    int tryLock(int numRetries = 0);

    /// Release the lock.  The behavior is undefined unless the current
    /// thread holds the lock.
    void unlock();
};

                         // ===================
                         // class SpinLockGuard
                         // ===================

/// This type implements a scoped guard for `SpinLock`.
class SpinLockGuard {

    // DATA
    SpinLock *d_lock_p; // lock proctored by this object

  private:
    // NOT IMPLEMENTED
    SpinLockGuard(const SpinLockGuard&);
    SpinLockGuard& operator=(const SpinLockGuard&);

  public:

    // CREATORS

    /// Create a proctor object that manages the specified `lock`.  Invoke
    /// `lock->lock()`.
    explicit SpinLockGuard(SpinLock *lock);

    /// Destroy this proctor object and invoke `unlock()` on the lock
    /// managed by this object.
    ~SpinLockGuard();

    // MANIPULATORS

    /// Return the lock pointer that was provided at construction and stop
    /// managing it.  (Subsequent calls to `release()` will return null and
    /// the destruction of this object will not affect the lock.)  The lock
    /// status is not changed by this call.
    SpinLock *release();
};

// ============================================================================
//                             INLINE DEFINITIONS
// ============================================================================

                             // --------------
                             // class SpinLock
                             // --------------
// CREATORS
#ifndef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION
constexpr SpinLock::SpinLock()
: d_state()
{
}

constexpr SpinLock::SpinLock(const SpinLock_MemberInitializer&)
: d_state()
{
}
#endif

// PRIVATE CLASS METHODS
inline
void SpinLock::doBackoff(int *count) {
    if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(*count < 10)) {
        pause();
    } else if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(*count < 25)) {
        yield();
    } else {
        sleepMillis(10);
    }
    ++(*count);
}


inline
void SpinLock::sleepMillis(int milliseconds)
{
#ifdef BSLS_PLATFORM_OS_WINDOWS
    ::Sleep(milliseconds);
#else
    timespec naptime;
    naptime.tv_sec = 0;
    naptime.tv_nsec = 1000 * 1000 * milliseconds;
    nanosleep(&naptime, 0);
#endif
}

inline
void SpinLock::yield() {
#ifdef BSLS_PLATFORM_OS_WINDOWS
    ::SleepEx(0, 0);
#else
    sched_yield();
#endif
}

// MANIPULATORS
inline
void SpinLock::lock() {
    lockWithoutBackoff();
}

inline
void SpinLock::lockWithBackoff() {
    int count = 0;
    do {
        // Implementation note: the outer 'if' block is not logically necessary
        // but may reduce memory barrier costs when spinning.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                break;
            }
        }
        doBackoff(&count);
    } while (1);
}

inline
void SpinLock::lockWithoutBackoff() {
    do {
        // Implementation note: the outer 'if' block is not logically necessary
        // but may reduce memory barrier costs when spinning.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                break;
            }
        }
    } while (1);
}

inline
int SpinLock::tryLock(int numRetries) {
    int count = 0;
    do {
        // See lock() for implementation note.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                return 0;                                             // RETURN
            }
        }
        doBackoff(&count);
    } while (numRetries--);
    return -1;
}

inline
void SpinLock::unlock() {
    BSLS_ASSERT_SAFE(e_LOCKED == AtomicOperations::getInt(&d_state));

    AtomicOperations::setIntRelease(&d_state, e_UNLOCKED);
}

                          // -------------------
                          // class SpinLockGuard
                          // -------------------
inline
SpinLockGuard::SpinLockGuard(SpinLock *lock)
: d_lock_p(lock) {
    BSLS_ASSERT_SAFE(0 != lock);
    lock->lock();
}

inline
SpinLockGuard::~SpinLockGuard()
{
    if (0 != d_lock_p) {
        d_lock_p->unlock();
    }
}

// MANIPULATORS
inline
SpinLock *SpinLockGuard::release() {
    SpinLock *lock = d_lock_p;
    d_lock_p = 0;
    return lock;
}

}  // close package namespace
}  // close enterprise namespace

#undef BSLS_SPINLOCK_USES_AGGREGATE_INITIALIZATION

#endif

// ----------------------------------------------------------------------------
// Copyright 2020 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------
